library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.5 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.4 ✓ stringr 1.4.0
## ✓ readr 2.0.2 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(bayesrules)
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
We can eliminate a and b because the prior is not symmetrical or centered around .5. The prior is right-tailed meaning beta > alpha so we can eliminate f. This leaves c,d, and e which have the same Beta distribution but different y and n. The liklihood is symmetrical around pi of 0.50 so the correct choice is e where y is half of n. Below I’ve plotted e to show it matches.
plot_beta_binomial(alpha = 3, beta = 8, y = 2, n = 4)
Kimya’s prior indicates that she thinks there’s a chance that the shop is closed but she is unsure.
plot_beta(alpha = 1, beta = 2)
Fernando’s prior. Fernando thinks that it is more liklely the shop is closed, but is unsure. He is more sure it’s closed than Kimya.
plot_beta(0.5, 1)
Ciara’s prior. Ciara is very sure the shop is not open.
plot_beta(alpha = 3, beta = 10)
Taylor’s prior. Taylor is sure the shop is open.
plot_beta(alpha = 2, beta = 0.10)
#Kimya's simulation
set.seed(1000)
kimya_sim <- data.frame(pi = rbeta(10000, 1, 2)) |> #simulate 10000 values of pi with beta(1,2)
mutate(y = rbinom(10000, size = 7, prob = pi)) #size 7 indicates 7 "trials"
# Keep only the simulated pairs that match our data
kimya_posterior <- kimya_sim |>
filter(y == 3)
# Plot the remaining pi values
ggplot(kimya_posterior, aes(x = pi)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
kimya_posterior |>
summarize(mean(pi), sd(pi)) #estimating posterior mean and sd
## mean(pi) sd(pi)
## 1 0.400521 0.1500653
#Fernando's simulation
set.seed(2000)
fernando_sim <- data.frame(pi = rbeta(10000, 0.50, 1)) |>
mutate(y = rbinom(10000, size = 7, prob = pi))
fernando_posterior <- fernando_sim |>
filter(y == 3)
# Plot the remaining pi values
ggplot(fernando_posterior, aes(x = pi)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
fernando_posterior |>
summarize(mean(pi), sd(pi))
## mean(pi) sd(pi)
## 1 0.4126212 0.1615311
#Ciara's simulation
set.seed(3000)
ciara_sim <- data.frame(pi = rbeta(10000, 3, 10)) |>
mutate(y = rbinom(10000, size = 7, prob = pi))
ciara_posterior <- ciara_sim |>
filter(y == 3)
# Plot the remaining pi values
ggplot(ciara_posterior, aes(x = pi)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ciara_posterior |>
summarize(mean(pi), sd(pi))
## mean(pi) sd(pi)
## 1 0.2963917 0.09966824
#Taylor's simulation
taylor_sim <- data.frame(pi = rbeta(10000, 2, 0.10)) |>
mutate(y = rbinom(10000, size = 7, prob = pi))
taylor_posterior <- taylor_sim |>
filter(y == 3)
# Plot the pi values that match data
ggplot(taylor_posterior, aes(x = pi)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
taylor_posterior |>
summarize(mean(pi), sd(pi))
## mean(pi) sd(pi)
## 1 0.5296116 0.1580043
We are given y = 3 and n = 7 and we will use these values to find the posterior for each of the above models.
The plots below show the posterior model and the mean of each posterior in purple. Summarize_beta_binomial shows the mean numerically.
#Kimya's model
plot_beta_binomial(alpha = 1, beta = 2, y = 3, n = 7) + geom_vline(xintercept = .40, color = "purple")
summarize_beta_binomial(alpha = 1, beta = 2, y = 3, n = 7)
## model alpha beta mean mode var sd
## 1 prior 1 2 0.3333333 0.000 0.05555556 0.2357023
## 2 posterior 4 6 0.4000000 0.375 0.02181818 0.1477098
#Fernando's model
plot_beta_binomial(alpha = 0.50, beta = 1, y = 3, n = 7) + geom_vline(xintercept = 0.4117647, color = "purple")
summarize_beta_binomial(alpha = 0.50, beta = 1, y = 3, n = 7)
## model alpha beta mean mode var sd
## 1 prior 0.5 1 0.3333333 1.0000000 0.08888889 0.2981424
## 2 posterior 3.5 5 0.4117647 0.3846154 0.02549627 0.1596755
#Ciara's model
plot_beta_binomial(alpha = 3, beta = 10, y = 3, n = 7) + geom_vline(xintercept = 0.3000, color = "purple")
summarize_beta_binomial(alpha = 3, beta = 10, y = 3, n = 7)
## model alpha beta mean mode var sd
## 1 prior 3 10 0.2307692 0.1818182 0.01267963 0.1126039
## 2 posterior 6 14 0.3000000 0.2777778 0.01000000 0.1000000
#Taylor's model
plot_beta_binomial(alpha = 2, beta = 0.10, y = 3, n = 7) + geom_vline(xintercept = 0.5494505, color = "purple")
summarize_beta_binomial(alpha = 2, beta = 0.10, y = 3, n = 7)
## model alpha beta mean mode var sd
## 1 prior 2 0.1 0.9523810 1.0000000 0.01462951 0.1209525
## 2 posterior 5 4.1 0.5494505 0.5633803 0.02451036 0.1565579
Comparing the simulations to the actual posteriors shows that the actual posteriors are very close to the simulated posteriors; they are all within 1 standard deviation of the estimate.
#a
plot_beta_binomial(alpha = 1, beta = 4, y = 8 , n = 10) #prior is to the left of the liklihood and posterior. Y and n indicate liklihood will be centered around .8
#b
plot_beta_binomial(alpha = 20, beta = 3, y = 0, n = 1) #Prior and posterior are to the right of liklihood which will be in the shape of a line give y and n.
#c
plot_beta_binomial(alpha = 4, beta = 2, y = 1, n = 3) #Prior is slightly left skewed given alpah > beta.
#d
plot_beta_binomial(alpha = 4, beta = 2, y = 10, n = 13) #Liklihood is left slightly left skewed and so is prior. Both are centered around 0.75.
#e
plot_beta_binomial(alpha = 20, beta = 2, y = 10, n=200) #Likelihood will have a sharp peak because of high n and is centered below 0.25. Prior will be left-skewed and is centered above 0.75.
plot_beta(7,2, mean = TRUE, mode = TRUE)
summarize_beta(7,2)
## mean mode var sd
## 1 0.7777778 0.8571429 0.01728395 0.1314684
quantile(rbeta(1000, 7,2))
## 0% 25% 50% 75% 100%
## 0.3248465 0.7007416 0.8001489 0.8850535 0.9986264
plot_beta_binomial(alpha = 7, beta = 2, y = 19, n = 20)
summarize_beta_binomial(alpha = 7, beta = 2, y = 19, n = 20)
## model alpha beta mean mode var sd
## 1 prior 7 2 0.7777778 0.8571429 0.017283951 0.13146844
## 2 posterior 26 3 0.8965517 0.9259259 0.003091558 0.05560178
With this data, my understanding of pi changed by increasing to 0.896 from 0.777. My certainty of pi has also increased since the liklihood will be narrower based on the data.
plot_beta_binomial(alpha = 7, beta = 2, y = 1, n = 20)
summarize_beta_binomial(alpha = 7, beta = 2, y = 1, n = 20)
## model alpha beta mean mode var sd
## 1 prior 7 2 0.7777778 0.8571429 0.01728395 0.1314684
## 2 posterior 8 21 0.2758621 0.2592593 0.00665874 0.0816011
The posterior mean decreased to 0.275.
plot_beta_binomial(alpha = 7, beta = 2, y = 10, n = 20)
summarize_beta_binomial(alpha = 7, beta = 2, y = 10, n = 20)
## model alpha beta mean mode var sd
## 1 prior 7 2 0.7777778 0.8571429 0.017283951 0.13146844
## 2 posterior 17 12 0.5862069 0.5925926 0.008085612 0.08992003
We know alpha + y = 8.5 so we’ll solve for y which is y = 8.5 - alpha = 8 For n we know .5 + n - y = 2.5 or .5 + n - 8.5 = 2.5 so n = 10
summarize_beta_binomial(alpha = .5, beta = .5, y = 8, n = 10)
## model alpha beta mean mode var sd
## 1 prior 0.5 0.5 0.5000000 0 and 1 0.12500000 0.3535534
## 2 posterior 8.5 2.5 0.7727273 0.833333333333333 0.01463499 0.1209751
plot_beta_binomial(alpha = .5, beta = .5, y = 8, n = 10)
We will use the above process to find the rest of y and n using vectors.
#finding y for b-f
alpha <- c(0.5, 10, 8, 2, 1)
posta <- c(3.5, 12, 15, 5, 30)
y = posta - alpha
#finding n for b-f
beta <- c(0.5, 1, 3, 2, 1)
postb <- c(10.5, 15, 6, 5, 3)
n = postb - beta + y
Next I’ll plot these models.
#plotting beta binomial
#b
plot_beta_binomial(alpha = .5, beta = .5, y = 3 , n = 13)
summarize_beta_binomial(alpha = .5, beta = .5, y = 3 , n = 13)
## model alpha beta mean mode var sd
## 1 prior 0.5 0.5 0.50 0 and 1 0.1250 0.3535534
## 2 posterior 3.5 10.5 0.25 0.208333333333333 0.0125 0.1118034
#c
plot_beta_binomial(alpha = 10, beta = 1, y = 2, n = 16)
summarize_beta_binomial(alpha = 10, beta = 1, y = 2, n = 16)
## model alpha beta mean mode var sd
## 1 prior 10 1 0.9090909 1.00 0.006887052 0.08298827
## 2 posterior 12 15 0.4444444 0.44 0.008818342 0.09390603
#d
plot_beta_binomial(alpha = 8, beta = 3, y = 7, n = 10)
summarize_beta_binomial(alpha = 8, beta = 3, y = 7, n = 10)
## model alpha beta mean mode var sd
## 1 prior 8 3 0.7272727 0.7777778 0.016528926 0.12856487
## 2 posterior 15 6 0.7142857 0.7368421 0.009276438 0.09631427
#e
plot_beta_binomial(alpha = 2, beta = 2, y = 3, n = 6)
summarize_beta_binomial(alpha = 2, beta = 2, y = 3, n = 6)
## model alpha beta mean mode var sd
## 1 prior 2 2 0.5 0.5 0.05000000 0.2236068
## 2 posterior 5 5 0.5 0.5 0.02272727 0.1507557
#f
plot_beta_binomial(alpha = 1, beta = 1, y = 29, n = 31)
summarize_beta_binomial(alpha = 1, beta = 1, y = 29, n = 31)
## model alpha beta mean mode var sd
## 1 prior 1 1 0.5000000 NaN 0.083333333 0.28867513
## 2 posterior 30 3 0.9090909 0.9354839 0.002430724 0.04930238
#a
summarize_beta_binomial(alpha = 1, beta = 1, y = 10, n = 13)
## model alpha beta mean mode var sd
## 1 prior 1 1 0.5000000 NaN 0.08333333 0.2886751
## 2 posterior 11 4 0.7333333 0.7692308 0.01222222 0.1105542
plot_beta_binomial(alpha = 1, beta = 1, y = 10, n = 13) #posterior is Beta(11,4)
#b
summarize_beta_binomial(alpha = 1, beta = 1, y = 0, n = 1)
## model alpha beta mean mode var sd
## 1 prior 1 1 0.5000000 NaN 0.08333333 0.2886751
## 2 posterior 1 2 0.3333333 0 0.05555556 0.2357023
plot_beta_binomial(alpha = 1, beta = 1, y = 0, n = 1) #posterior is Beta(1,2)
#c
summarize_beta_binomial(alpha = 1, beta = 1, y = 100, n = 130)
## model alpha beta mean mode var sd
## 1 prior 1 1 0.5000000 NaN 0.083333333 0.28867513
## 2 posterior 101 31 0.7651515 0.7692308 0.001351088 0.03675715
plot_beta_binomial(alpha = 1, beta = 1, y = 100, n = 130) #posterior is Beta(101, 31)
#d
summarize_beta_binomial(alpha = 1, beta = 1, y = 20, n = 120)
## model alpha beta mean mode var sd
## 1 prior 1 1 0.5000000 NaN 0.083333333 0.28867513
## 2 posterior 21 101 0.1721311 0.1666667 0.001158553 0.03403752
plot_beta_binomial(alpha = 1, beta = 1, y = 20, n = 120) #posterior is Beta(21, 101)
#e
summarize_beta_binomial(alpha = 1, beta = 1, y = 234, n = 468)
## model alpha beta mean mode var sd
## 1 prior 1 1 0.5 NaN 0.0833333333 0.28867513
## 2 posterior 235 235 0.5 0.5 0.0005307856 0.02303878
plot_beta_binomial(alpha = 1, beta = 1, y = 234, n = 468) #posterior is Beta(235,235)
#a
summarize_beta_binomial(alpha = 10, beta = 2, y = 10, n = 13)
## model alpha beta mean mode var sd
## 1 prior 10 2 0.8333333 0.900000 0.010683761 0.10336228
## 2 posterior 20 5 0.8000000 0.826087 0.006153846 0.07844645
plot_beta_binomial(alpha = 10, beta = 2, y = 10, n = 13) #posterior is Beta(20,5)
#b
summarize_beta_binomial(alpha = 10, beta = 2, y = 0, n = 1)
## model alpha beta mean mode var sd
## 1 prior 10 2 0.8333333 0.9000000 0.01068376 0.1033623
## 2 posterior 10 3 0.7692308 0.8181818 0.01267963 0.1126039
plot_beta_binomial(alpha = 10, beta = 2, y = 0, n = 1) #posterior is Beta(10,3)
#c
summarize_beta_binomial(alpha = 10, beta = 2, y = 100, n = 130)
## model alpha beta mean mode var sd
## 1 prior 10 2 0.8333333 0.9000000 0.010683761 0.10336228
## 2 posterior 110 32 0.7746479 0.7785714 0.001220759 0.03493936
plot_beta_binomial(alpha = 10, beta = 2, y = 100, n = 130) #posterior is Beta(110, 32)
#d
summarize_beta_binomial(alpha = 10, beta = 2, y = 20, n = 120)
## model alpha beta mean mode var sd
## 1 prior 10 2 0.8333333 0.9000000 0.01068376 0.1033623
## 2 posterior 30 102 0.2272727 0.2230769 0.00132045 0.0363380
plot_beta_binomial(alpha = 10, beta = 2, y = 20, n = 120) #posterior is Beta(30, 102)
#e
summarize_beta_binomial(alpha = 10, beta = 2, y = 234, n = 468)
## model alpha beta mean mode var sd
## 1 prior 10 2 0.8333333 0.9000000 0.0106837607 0.10336228
## 2 posterior 244 236 0.5083333 0.5083682 0.0005196061 0.02279487
plot_beta_binomial(alpha = 10, beta = 2, y = 234, n = 468) #posterior is Beta(244,236)
#plotting uniform distribution with code modified from https://www.statology.org/plot-uniform-distribution-in-r/
#define x-axis
x <- seq(0.0, 1.5, length=50)
#calculate uniform distribution probabilities
y <- dunif(x, min = 0.50, max = 1)
#plot uniform distribution
plot(x, y, type = 'l')
The politician thinks that that his approval rating being any value between 50% is and 100% is equally likely.
construct a formula and sketch posterior understanding if y = 0 and n = 100. We’ll use pi|(Y=y)~Beta(alpha + y, B+ n- y)
posterior alpha = 0.5 + 0, posterior beta = 1 + 100 - 0 so posterior is Beta(0.50, 101)
plot_beta(0.50, 101)
summarize_beta(0.50, 101)
## mean mode var sd
## 1 0.004926108 0 4.782285e-05 0.006915407
plot_beta_binomial(alpha = 1, beta = 1, y = 0, n = 100) #uniform distribution that allows for all values 0>pi<1
plot_beta_binomial(alpha = 20, beta = 10, y = 0, n = 100) #beta prior that shows support for politician may be high
#a
plot_beta_binomial(alpha = 2, beta = 3, y = 1, n = 1) #Updating after first observation is a success
summarize_beta_binomial(alpha = 2, beta = 3, y = 1, n = 1) #posterior Beta(3,3)
## model alpha beta mean mode var sd
## 1 prior 2 3 0.4 0.3333333 0.04000000 0.2000000
## 2 posterior 3 3 0.5 0.5000000 0.03571429 0.1889822
#b
plot_beta_binomial(alpha = 3, beta = 3, y = 2, n = 2) #updating after second success and using first posterior as updated prior
summarize_beta_binomial(alpha = 3, beta = 3, y =2, n = 2) #Posterior is Beta(5,3)
## model alpha beta mean mode var sd
## 1 prior 3 3 0.500 0.5000000 0.03571429 0.1889822
## 2 posterior 5 3 0.625 0.6666667 0.02604167 0.1613743
#c
plot_beta_binomial(alpha = 5, beta = 3, y = 2, n = 3) #update after failure and with a new prior
summarize_beta_binomial(alpha = 5, beta = 3, y = 2, n = 3)#posterior is Beta(7,4)
## model alpha beta mean mode var sd
## 1 prior 5 3 0.6250000 0.6666667 0.02604167 0.1613743
## 2 posterior 7 4 0.6363636 0.6666667 0.01928375 0.1388659
#d
plot_beta_binomial(alpha = 7, beta = 4, y = 3, n = 4) #update after 4th trial is success
summarize_beta_binomial(alpha = 7, beta = 4, y = 3, n = 4) #final posterior is Beta(10, 5)
## model alpha beta mean mode var sd
## 1 prior 7 4 0.6363636 0.6666667 0.01928375 0.1388659
## 2 posterior 10 5 0.6666667 0.6923077 0.01388889 0.1178511
#a
plot_beta_binomial(alpha = 2, beta = 3, y = 3, n = 5) #Updating after first observation is 3 out 5 successes
summarize_beta_binomial(alpha = 2, beta = 3, y = 3, n = 5) #posterior is Beta(5,5)
## model alpha beta mean mode var sd
## 1 prior 2 3 0.4 0.3333333 0.04000000 0.2000000
## 2 posterior 5 5 0.5 0.5000000 0.02272727 0.1507557
#b
plot_beta_binomial(alpha = 5, beta = 5, y = 4, n = 10) #updating after second set of observations is 1 out of 5 for a total of 4 out 10 successes
summarize_beta_binomial(alpha = 5, beta = 5, y = 4, n = 10) #posterior is Beta(9,11)
## model alpha beta mean mode var sd
## 1 prior 5 5 0.50 0.5000000 0.02272727 0.1507557
## 2 posterior 9 11 0.45 0.4444444 0.01178571 0.1085620
#c
plot_beta_binomial(alpha = 9, beta = 11, y = 5, n = 15) #update after 3rd round of observations with 1 success out of 5 for a total of 5 out of 15 successes
summarize_beta_binomial(alpha = 9, beta = 11, y = 5, n = 15) #posterior is Beta(9,16)
## model alpha beta mean mode var sd
## 1 prior 9 11 0.45 0.4444444 0.011785714 0.10856203
## 2 posterior 14 21 0.40 0.3939394 0.006666667 0.08164966
#d
plot_beta_binomial(alpha = 9, beta = 16, y = 7, n = 20) #update after 4th trial has 2 out 5 successes for a total of 7 out of 20 successes
summarize_beta_binomial(alpha = 9, beta = 16, y = 7, n = 20) #final posterior is Beta(16, 29)
## model alpha beta mean mode var sd
## 1 prior 9 16 0.3600000 0.3478261 0.008861538 0.09413574
## 2 posterior 16 29 0.3555556 0.3488372 0.004981213 0.07057771
plot_beta(4,3, mean = TRUE, mode = TRUE)
quantile(rbeta(1000, 4,3))
## 0% 25% 50% 75% 100%
## 0.06857635 0.45418148 0.58287898 0.70782597 0.98727103
summarize_beta(4,3)
## mean mode var sd
## 1 0.5714286 0.6 0.03061224 0.1749636
#The employees' prior understanding shows they think it's more likely that visitors will click on the site (there's a left-skew). The quantile function shows that f(pi) is centered around 0.577; the mean is 0.5714286.
a <- 4
b <- 3
yclicks <- c(0, 3, 20)
nsamp <- c(1, 10, 100) #employee's sample sizes
postalpha <- a + yclicks
postbeta <- b + nsamp - yclicks
The posterior models are Beta(4,4), Beta(7,10), and Beta(24,20).
c&d)
#employee 1
plot_beta_binomial(alpha = 4, beta = 3, y = 0, n = 1)
summarize_beta_binomial(alpha = 4, beta = 3, y = 0, n = 1)
## model alpha beta mean mode var sd
## 1 prior 4 3 0.5714286 0.6 0.03061224 0.1749636
## 2 posterior 4 4 0.5000000 0.5 0.02777778 0.1666667
#employee 2
plot_beta_binomial(alpha = 4, beta = 3, y = 3, n = 10)
summarize_beta_binomial(alpha = 4, beta = 3, y = 3, n = 10)
## model alpha beta mean mode var sd
## 1 prior 4 3 0.5714286 0.6 0.03061224 0.1749636
## 2 posterior 7 10 0.4117647 0.4 0.01345636 0.1160016
#employee 3
plot_beta_binomial(alpha = 4, beta = 3, y = 20, n = 100)
summarize_beta_binomial(alpha = 4, beta = 3, y = 20, n = 100)
## model alpha beta mean mode var sd
## 1 prior 4 3 0.5714286 0.6000000 0.030612245 0.17496355
## 2 posterior 24 83 0.2242991 0.2190476 0.001611009 0.04013738
The first employee had the worst dataset among the three so the prior really influenced the posterior. The second employee has the most balance between his prior and data among the three employees. The final employee has the strength of a dataset with high n so it has a posterior with the least variance among the three and that is heavily influenced by the data. I would rely on the third employee’s model to make decisions about pulling or keeping the ad.
Day 2 will see the addition of 10 data points and 3 successes for a total of y2 = 3 n2 = 11. Using this and pi|(Y=y)~Beta(alpha + y, B + n - y) and our updated prior will give us day 2 posterior of Beta(7,12).
Day 3 will have Beta(7,12) as our new prior. We add 100 data points and 20 successes for y3 = 23 and n3 = 111. alpha + y3 = 30; beta + n3 - y3 = 100 for Beta(30, 100).
#day 1
plot_beta_binomial(alpha = 4, beta = 3, y = 0, n = 1)
summarize_beta_binomial(alpha = 4, beta = 3, y = 0, n = 1) #posterior is Beta(4,4)
## model alpha beta mean mode var sd
## 1 prior 4 3 0.5714286 0.6 0.03061224 0.1749636
## 2 posterior 4 4 0.5000000 0.5 0.02777778 0.1666667
#day 2
plot_beta_binomial(alpha = 4, beta = 4, y = 3, n = 11)
summarize_beta_binomial(alpha = 4, beta = 4, y = 3, n = 11) #posterior is Beta(7,12)
## model alpha beta mean mode var sd
## 1 prior 4 4 0.5000000 0.5000000 0.02777778 0.1666667
## 2 posterior 7 12 0.3684211 0.3529412 0.01163435 0.1078626
#day 3
plot_beta_binomial(alpha = 7, beta = 12, y = 23, n = 111)
summarize_beta_binomial(alpha = 7, beta = 12, y = 23, n = 111) #posterior is Beta(30, 100)
## model alpha beta mean mode var sd
## 1 prior 7 12 0.3684211 0.3529412 0.011634349 0.10786264
## 2 posterior 30 100 0.2307692 0.2265625 0.001355075 0.03681134
#As they collected more data the fourth employee reduced variance in his posterior model. The fourth employee can be more certain of his pi.
plot_beta_binomial(alpha = 4, beta = 3, y = 23, n = 111)
summarize_beta_binomial(alpha = 4, beta = 3, y = 23, n = 111) #posterior is Beta (27,91).
## model alpha beta mean mode var sd
## 1 prior 4 3 0.5714286 0.6000000 0.03061224 0.17496355
## 2 posterior 27 91 0.2288136 0.2241379 0.00148284 0.03850766
#Comparing this to Beta(30, 100) from the sequential analysis, we notice that variance is slightly higher in the non-sequential analysis (0.00148284 versus 0.001355075) and that the meanshave a small difference. This demonstrates that more certainty in a posterior model can be achieved if we update our priors as we collect more information.
Import bechdel data.
data(bechdel, package = "bayesrules")
bechdel |>
filter(year == 1980) |>
tabyl(binary) |>
adorn_totals("row")
## binary n percent
## FAIL 10 0.7142857
## PASS 4 0.2857143
## Total 14 1.0000000
His first day’s posterior can be modeled and summarized with the below.
plot_beta_binomial(alpha = 1, beta = 1, y = 4, n = 14)
summarize_beta_binomial(alpha = 1, beta = 1, y = 4, n = 14) #Posterior is Beta(5,11)
## model alpha beta mean mode var sd
## 1 prior 1 1 0.5000 NaN 0.08333333 0.2886751
## 2 posterior 5 11 0.3125 0.2857143 0.01263787 0.1124183
bechdel |>
filter(year == 1990) |>
tabyl(binary) |>
adorn_totals("row")
## binary n percent
## FAIL 9 0.6
## PASS 6 0.4
## Total 15 1.0
Day 2 model.
plot_beta_binomial(alpha = 5, beta = 11, y = 10, n = 29) #total passes so far equals 6 + 4 and updated n size is 14 + 15
summarize_beta_binomial(alpha = 5, beta = 11, y = 10, n = 29) #Posterior is Beta(15,30)
## model alpha beta mean mode var sd
## 1 prior 5 11 0.3125000 0.2857143 0.012637868 0.1124183
## 2 posterior 15 30 0.3333333 0.3255814 0.004830918 0.0695048
bechdel |>
filter(year == 2000) |>
tabyl(binary) |>
adorn_totals("row")
## binary n percent
## FAIL 34 0.5396825
## PASS 29 0.4603175
## Total 63 1.0000000
Plot and summary with updated y an n and new prior from day 2.
plot_beta_binomial(alpha = 15, beta = 30, y = 39, n = 92) #total passes so far equals 6 + 4 + 29 and total n = 63 + 15 + 14
summarize_beta_binomial(alpha = 15, beta = 30, y = 39, n = 92) #Posterior is Beta(54,83)
## model alpha beta mean mode var sd
## 1 prior 15 30 0.3333333 0.3255814 0.004830918 0.06950480
## 2 posterior 54 83 0.3941606 0.3925926 0.001730420 0.04159832
bechdel |>
filter(year %in% c("1980", "1990", "2000")) |> #filtered by three years of relevance with code adapted from Moderndrive
tabyl(binary) |>
adorn_totals("row")
## binary n percent
## FAIL 53 0.576087
## PASS 39 0.423913
## Total 92 1.000000
plot_beta_binomial(alpha = 1, beta = 1, y = 39, n = 92)
summarize_beta_binomial(alpha = 1, beta = 1, y = 39, n = 92) #Posterior is Beta(40, 54)
## model alpha beta mean mode var sd
## 1 prior 1 1 0.5000000 NaN 0.083333333 0.28867513
## 2 posterior 40 54 0.4255319 0.423913 0.002573205 0.05072677
Frequentists often begin their analyses with a null (and alternate) hypothesis which is different from how Bayesians begin by estimating the distribution of probabilities for a set of data. A similarity is that both Bayesians and frequentists allow for new data to change their prior or hypothesis, respectively. Both also have the same subjectivity issues that can come with data collection and management practices.